% This script uses the reduced-form coefficients from estimateSTAR.m to
% test the null hypothesis of Poisson random networks (PRN)
% Date: April 29, 2019
clear; close all; clc; 
% filepath = 'C:\Users\xt9\Box Sync\misclassifiedNetwork\data\';
filepath = 'C:\Users\xt9\Dropbox\Arthur_Xi_Xun\unobserved_links\final submission files\Data\';
rng(5,'v5uniform');
% load estimates from original and bootstrap samples, created by mainSTAR.m
load([filepath 'estSTAR.mat']);  load([filepath 'BSestSTAR.mat']); 
options = optimset('TolX',1e-8,'TolFun',1e-8,'MaxFunEvals',1e8,...
                       'MaxIter',1e7,'Display','off'); global B lg;
                   
%% test the null of Poisson random network using CMD criterion functions
% p0(:,:,1)  = [ .42 .32; .45 .40]; % choosing initial values
% p0(:,:,2)  = [ .40 .30; .43 .38];
% p0(:,:,3)  = [ .38 .28; .41 .36];
rng(1,'v5uniform');  p0 = rand(2,2,3); 
strings = {'small,dispersed d.o.b.','large,dispersed d.o.b.'; ...
           'small,concentrated d.o.b.','large,concentrated d.o.b.'};
disp('Minimum Chi-square Distance Statistics: ');
for i = 1:2, % loop in dispersion of d.o.b. (more vs less dispersed)
    for j = 1:2, % loop in class size (small vs large)
        wm = calWM(i,j,Brcm); % calculate weight matrix to be used in CMD     
        beta = BIND(3:5); gamma = [0;BIND(6:7)]; lambda = BIND(j);
        mu = rcmInd{i,j}; n = UB(i,j); lb = LB(i,j); ub = UB(i,j);
        n1 = floor(n/3); n2 = floor(2*n/3); % partition group into three blocks
        l = [n1 n2-n1 n-n2]; ind = {1:n1,n1+1:n2, n2+1:n}; % index for each block
        % define handles for objective functions 
        fH = @(p) fitObjFun_vp(p,beta,gamma,lambda,mu,n,lb,ub,ind,l,wm); 
        % estimate link formation prob
        [phat(i,j,:),fval(i,j),exitFlag(i,j)] = fmincon(fH,squeeze(p0(i,j,:)),...
                              [],[],[],[],zeros(3,1),ones(3,1),[],options);   
        % report test of overidentification
        disp(strings{i,j}); disp(sprintf('W_n = %g',fval(i,j)));
    end;
end;
save([filepath 'linkFormProb.mat'],'phat','fval','exitFlag'); phat,

%% counterfactual exercises 
for i = 1:2, % loop in dispersion of d.o.b. (more vs less dispersed)
    for j = 1:2, % loop in class size (small vs large)
        beta = BIND(3:5); gamma = [0;BIND(6:7)]; lambda = BIND(j);
        mu = rcmInd{i,j}; lb = LB(i,j); ub = UB(i,j);
        % avg outcome reported in data 
        OYbar{i,j}  = nanmean(OYt{i,j},1); 
        % avg predicted outcome from reduced-form OLS
        PY{i,j} = BCON(j)/(1-BIND(j)) + repmat(OclsXt{i,j}*BCLS(j),lb,1) + ...
                       mu(:,:,1)*imputeNaN(squeeze(OindXt{i,j}(1,:,:)),1,1) + ...
                       mu(:,:,2)*imputeNaN(squeeze(OindXt{i,j}(2,:,:)),1,1) +...
                       mu(:,:,3)*imputeNaN(squeeze(OindXt{i,j}(3,:,:)),1,1) ; 
        PYbar{i,j}  = nanmean(PY{i,j},1);
        % c.f. outcome 
        cfG  = (ones(ub,ub)-eye(ub))/(ub-1);
        for ki = 1:size(mu,3),
            % impose complete network
            cnMu{i,j}(:,:,ki)  = inv(eye(ub)-BIND(j)*cfG)*(beta(ki)*eye(ub)-gamma(ki)*cfG);
            % impose complete network and swap peer effects
            cfMu{i,j}(:,:,ki)  = inv(eye(ub)-BIND(3-j)*cfG)*(beta(ki)*eye(ub)-gamma(ki)*cfG); 
        end;
        % c.f. if G is "complete"
        DY{i,j} = BCON(j)/(1-BIND(j)) + repmat(OclsXt{i,j}*BCLS(j),lb,1) + ...
                       cnMu{i,j}(1:lb,:,1)*imputeNaN(squeeze(OindXt{i,j}(1,:,:)),1,1) + ...
                       cnMu{i,j}(1:lb,:,2)*imputeNaN(squeeze(OindXt{i,j}(2,:,:)),1,1) +...
                       cnMu{i,j}(1:lb,:,3)*imputeNaN(squeeze(OindXt{i,j}(3,:,:)),1,1) ;
        DY{i,j} = max(min(DY{i,j},700),550);
        DYbar{i,j}  = nanmean(DY{i,j},1);
        % c.f. if G is "complete" and peer effects are swapped
        CY{i,j} = BCON(j)/(1-BIND(j)) + repmat(OclsXt{i,j}*BCLS(j),lb,1) + ...
                       cfMu{i,j}(1:lb,:,1)*imputeNaN(squeeze(OindXt{i,j}(1,:,:)),1,1) + ...
                       cfMu{i,j}(1:lb,:,2)*imputeNaN(squeeze(OindXt{i,j}(2,:,:)),1,1) +...
                       cfMu{i,j}(1:lb,:,3)*imputeNaN(squeeze(OindXt{i,j}(3,:,:)),1,1) ;
        CY{i,j} = max(min(CY{i,j},700),550);
        CYbar{i,j}  = nanmean(CY{i,j},1);       
        
%         % scatter plot that shows difference in test performance
%         figure((i-1)*2+j); 
%         subplot(3,1,1); hist(OYbar{i,j},100); title('observed');
%         subplot(3,1,2); hist(PYbar{i,j},100); title('predicted');
%         subplot(3,1,3); hist(CYbar{i,j},100); title('c.f.');
%         suplabel(['Counterfactual: ',strings{i,j}],'t');   
        % hypothesis tests
        [h0,p0] = ttest2(OYbar{i,j},DYbar{i,j},'alpha',.05,'vartype','unequal');
        H0(i,j) = h0; P0(i,j) = p0; 
        disp(strings{i,j}); 
        disp(sprintf('diff. mean btw DY and OY = %g',nanmean(DYbar{i,j}-OYbar{i,j}))); 
        % disp(sprintf('c.f. mean = %g',nanmean(Ybar2{i,j}))); 
        disp(sprintf('p = %g',p0));       
        
        [h,p] = ttest2(OYbar{i,j},CYbar{i,j},'alpha',.05,'vartype','unequal');
        H(i,j) = h; P(i,j) = p; 
        disp(strings{i,j}); 
        disp(sprintf('diff. mean btw CY and OY = %g',nanmean(CYbar{i,j}-OYbar{i,j}))); 
        % disp(sprintf('c.f. mean = %g',nanmean(Ybar2{i,j}))); 
        disp(sprintf('p = %g',p));
        
    end;
end;

 